#   LibAut
#       Episode criteria sensitivity 
#   Juraj Medzihorsky
#   2021-08-10

library(compiler)
library(ERT)
library(parallel)
options(mc.cores=14) # for a 16-thread Ubuntu machine

#   sessionInfo()
##  R version 4.0.4 (2021-02-15)
##  Platform: x86_64-pc-linux-gnu (64-bit)
##  Running under: Ubuntu 20.04.2 LTS
##
##  Matrix products: default
##  BLAS/LAPACK: /opt/OpenBLAS/lib/libopenblas_haswellp-r0.3.13.so
##
##  locale:
##   [1] LC_CTYPE=C.UTF-8           LC_NUMERIC=C
##   [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C.UTF-8
##   [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C.UTF-8
##   [7] LC_PAPER=C.UTF-8           LC_NAME=C
##   [9] LC_ADDRESS=C               LC_TELEPHONE=C
##  [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
##  attached base packages:
##  [1] parallel  compiler  stats     graphics  grDevices utils     datasets
##  [8] methods   base
##
##  other attached packages:
##  [1] ERT_2.0        nvimcom_0.9-92
##
##  loaded via a namespace (and not attached):
##   [1] Rcpp_1.0.6       pillar_1.4.7     miscTools_0.6-26 tools_4.0.4
##   [5] digest_0.6.27    nlme_3.1-152     lifecycle_1.0.0  tibble_3.0.6
##   [9] gtable_0.3.0     lattice_0.20-41  pkgconfig_2.0.3  rlang_0.4.10
##  [13] DBI_1.1.1        maxLik_1.4-6     hablar_0.3.0     dplyr_1.0.4
##  [17] stringr_1.4.0    generics_0.1.0   vctrs_0.3.6      gbRd_0.4-11
##  [21] lmtest_0.9-38    grid_4.0.4       tidyselect_1.1.0 glue_1.4.2
##  [25] R6_2.5.0         bdsmatrix_1.3-4  Rdpack_2.1       plm_2.4-0
##  [29] Formula_1.2-4    ggplot2_3.3.3    purrr_0.3.4      tidyr_1.1.2
##  [33] magrittr_2.0.1   scales_1.1.1     ellipsis_0.3.1   rbibutils_2.0
##  [37] MASS_7.3-53.1    assertthat_0.2.1 colorspace_2.0-0 sandwich_3.0-0
##  [41] stringi_1.5.3    munsell_0.5.0    crayon_1.4.1     zoo_1.8-8

#------------------------------------------------------------------------------

code_dir <- getwd()
data_dir <- gsub('code$', 'data', code_dir)

setwd(data_dir)
vdem <- readRDS('V-Dem-CY-Full+Others-v10.rds')
setwd(code_dir)

#------------------------------------------------------------------------------

#   # DEFAULTS
#   start_incl_0 <- 0.01
#   cum_incl_0   <- 0.1
#   year_turn_0  <- 0.03
#   cum_turn_0   <- 0.1
#   tolerance_0  <- 5

#   criteria grid
val_grid <-
    expand.grid(
        si=seq(0.005, 0.015, 0.001),
        ci=seq(0.05, 0.15, 0.01),
        yt=seq(0.01, 0.05, 0.005),
        ct=seq(0.05, 0.15, 0.01),
        to=seq(3, 7, 1)
    )

nrow(val_grid)
# 59895

#   columns to keep
to_take <- c("dem_ep_id",
             "year", 
             "dem_ep_start_year",
             "dem_ep_end_year",
             "dem_pre_ep_year",
             "dem_ep_prch",
             "dem_ep_ptr",
             "dem_ep_subdep",
             "dem_ep_outcome",
             "dem_ep_censored")

#   wrapper to find episodes for one set of criteria inputed as grid + row no
onerun <-
    cmpfun(
    function(i, take=to_take, vg=val_grid)
    {
        e <- 
            get_eps(
                data=vdem,
                start_incl=vg$si[i],
                cum_incl=vg$ci[i],
                year_turn=vg$yt[i],
                cum_turn=vg$ct[i],
                tolerance=vg$to[i]
            )
        e <- e[!is.na(e$dem_ep_id), take]
        gc()
        return(as.data.frame(e))
    })


#   estimated duration: 6-7h
test <- system.time(e1 <- onerun(1))
print(nrow(val_grid)*test/3600*(1/10))


#-------------------------------------------------------------------------------
#   whole grid
#-------------------------------------------------------------------------------

system.time(le <- mclapply(1:nrow(val_grid), onerun))

#   15 gb
print(object.size(le), units='GB')

long <- do.call(rbind, le)
long$uid <- paste0(substr(long$dem_ep_id, 1, 4), long$year)

#   remove democratic deepening episodes
old_n <- nrow(long)
long <- subset(long, (dem_ep_prch%in%1) & (dem_ep_ptr%in%0))
new_n <- nrow(long)

#   create a lean table with the found episodes
atab <- as.data.frame(with(long, table(uid, dem_ep_outcome)))
atab$country_text_id <- substr(atab$uid, 1, 3)
atab$year <- substr(atab$uid, 5, 8)

setwd(data_dir)
saveRDS(atab, 'episode_sensitivity_20210810.rds')

#   SCRIPT END